Demonstrating Stock Price Prediction methods to identify the trade across tech firms - Intel, Nvidia, AMD. The data is caputed across 3 years starting from March 2019 to March 2022. The data is extracted from Yahoo Finance.
Columns extracted from Yahoo Finance:
1.“Date” : Trading date of a stock 2.“Open” : Opening price of a stock as per the day 3.“High” : High is a stock’s highest trading price of the day 4.“Low” : Low is a stock’s lowest trading price of the day 5.“Close” : Closing price of a stock 6.“Adj.Close” : The closing price after adjustments for all applicable splits and dividend distributions. 7.“Volume” : Volume measures the number of a stock’s shares that are traded on a stock exchange in a day or a period of time
The data is captured at day level for all the tech firms
####Installing required packages
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 4.1.2
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.1.2
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
library(tseries)
library(timeSeries)
## Loading required package: timeDate
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:timeSeries':
##
## filter, lag
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fGarch)
## Loading required package: fBasics
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:TTR':
##
## volatility
library(xts)
nvidia <- getSymbols("NVDA", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
amd <- getSymbols("AMD", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
intel <- getSymbols("INTC", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
indf_data1 <- Cl(nvidia)
indf_data2 <- Cl(amd)
indf_data3 <- Cl(intel)
Nvidia stock market plot
chart_Series(indf_data1, col = "black")
add_SMA(n = 100, on = 1, col = "red")
add_SMA(n = 20, on = 1, col = "black")
add_RSI(n = 14, maType = "SMA")
add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)
add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)
AMD stock market plot
chart_Series(indf_data2, col = "black")
add_SMA(n = 100, on = 1, col = "red")
add_SMA(n = 20, on = 1, col = "black")
add_RSI(n = 14, maType = "SMA")
add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)
add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)
Intel stock market plot
chart_Series(indf_data3, col = "black")
add_SMA(n = 100, on = 1, col = "red")
add_SMA(n = 20, on = 1, col = "black")
add_RSI(n = 14, maType = "SMA")
add_BBands(n = 20, maType = "SMA", sd = 1, on = -1)
add_MACD(fast = 12, slow = 25, signal = 9, maType = "SMA", histogram = TRUE)
indf_log1 <- log(indf_data1)
indf_log2 <- log(indf_data2)
indf_log3 <- log(indf_data3)
head(indf_log1, n = 2)
## NVDA.Close
## 2019-03-11 3.695979
## 2019-03-12 3.704507
tail(indf_log1, n = 2)
## NVDA.Close
## 2021-03-05 4.825229
## 2021-03-08 4.753008
head(indf_log2, n = 2)
## AMD.Close
## 2019-03-11 3.133754
## 2019-03-12 3.156575
tail(indf_log2, n = 2)
## AMD.Close
## 2021-03-05 4.363353
## 2021-03-08 4.303524
head(indf_log3, n = 2)
## INTC.Close
## 2019-03-11 3.976874
## 2019-03-12 3.980989
tail(indf_log3, n = 2)
## INTC.Close
## 2021-03-05 4.106602
## 2021-03-08 4.091841
plot(indf_log1, main = "log Nvidia chart")
plot(indf_log2, main = "log AMD chart")
plot(indf_log3, main = "log Intel chart")
acf_log1 <- acf(indf_log1, lag.max = 320)
acf_log2 <- acf(indf_log2, lag.max = 320)
acf_log3 <- acf(indf_log3, lag.max = 320)
pacf_log1 <- pacf(indf_log1, lag.max = 320)
pacf_log2 <- pacf(indf_log2, lag.max = 320)
pacf_log3 <- pacf(indf_log3, lag.max = 320)
indf_diff1 <- diff(indf_log1, lag = 1)
indf_diff1 <- na.locf(indf_diff1, na.rm = TRUE,fromLast = TRUE)
indf_diff2 <- diff(indf_log2, lag = 1)
indf_diff2 <- na.locf(indf_diff2, na.rm = TRUE,fromLast = TRUE)
indf_diff3 <- diff(indf_log3, lag = 1)
indf_diff3 <- na.locf(indf_diff3, na.rm = TRUE,fromLast = TRUE)
plot(indf_diff1)
plot(indf_diff2)
plot(indf_diff3)
As the log data is not stationary, we should difference at certain lag for it to be stationary.
Augmented Dickey Fuller Test
adf1 <- adf.test(indf_log1, alternative = c("stationary", "explosive"), k = 0)
adf2 <- adf.test(indf_log2, alternative = c("stationary", "explosive"), k = 0)
adf3 <- adf.test(indf_log3, alternative = c("stationary", "explosive"), k = 0)
adf1
##
## Augmented Dickey-Fuller Test
##
## data: indf_log1
## Dickey-Fuller = -2.4229, Lag order = 0, p-value = 0.3993
## alternative hypothesis: stationary
adf2
##
## Augmented Dickey-Fuller Test
##
## data: indf_log2
## Dickey-Fuller = -2.9462, Lag order = 0, p-value = 0.1778
## alternative hypothesis: stationary
adf3
##
## Augmented Dickey-Fuller Test
##
## data: indf_log3
## Dickey-Fuller = -2.8627, Lag order = 0, p-value = 0.2132
## alternative hypothesis: stationary
library(caTools)
train_data1 <- indf_diff1[1:365]
train_data2 <- indf_diff2[1:365]
train_data3 <- indf_diff3[1:365]
library(forecast)
set.seed(123)
arima_model1 <- auto.arima(train_data1, stationary = TRUE, ic = c("aicc", "aic", "bic"),
trace = TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,0,2) with non-zero mean : -1502.19
## ARIMA(0,0,0) with non-zero mean : -1475.995
## ARIMA(1,0,0) with non-zero mean : -1500.485
## ARIMA(0,0,1) with non-zero mean : -1494.591
## ARIMA(0,0,0) with zero mean : -1474.638
## ARIMA(1,0,2) with non-zero mean : -1505.109
## ARIMA(0,0,2) with non-zero mean : -1508.164
## ARIMA(0,0,3) with non-zero mean : -1506.108
## ARIMA(1,0,1) with non-zero mean : -1501.82
## ARIMA(1,0,3) with non-zero mean : Inf
## ARIMA(0,0,2) with zero mean : -1506.2
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(0,0,2) with non-zero mean : -1508.038
##
## Best model: ARIMA(0,0,2) with non-zero mean
arima_model2 <- auto.arima(train_data2, stationary = TRUE, ic = c("aicc", "aic", "bic"),
trace = TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,0,2) with non-zero mean : -1388.553
## ARIMA(0,0,0) with non-zero mean : -1379.434
## ARIMA(1,0,0) with non-zero mean : -1390.582
## ARIMA(0,0,1) with non-zero mean : -1388.651
## ARIMA(0,0,0) with zero mean : -1378.017
## ARIMA(2,0,0) with non-zero mean : -1391.91
## ARIMA(3,0,0) with non-zero mean : -1388.903
## ARIMA(2,0,1) with non-zero mean : -1389.856
## ARIMA(1,0,1) with non-zero mean : -1390.969
## ARIMA(3,0,1) with non-zero mean : -1387.702
## ARIMA(2,0,0) with zero mean : -1390.157
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(2,0,0) with non-zero mean : -1393.154
##
## Best model: ARIMA(2,0,0) with non-zero mean
arima_model3 <- auto.arima(train_data3, stationary = TRUE, ic = c("aicc", "aic", "bic"),
trace = TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,0,2) with non-zero mean : -1588.275
## ARIMA(0,0,0) with non-zero mean : -1543.129
## ARIMA(1,0,0) with non-zero mean : -1581.114
## ARIMA(0,0,1) with non-zero mean : -1571.65
## ARIMA(0,0,0) with zero mean : -1545.126
## ARIMA(1,0,2) with non-zero mean : -1590.359
## ARIMA(0,0,2) with non-zero mean : -1593.295
## ARIMA(0,0,3) with non-zero mean : -1591.361
## ARIMA(1,0,1) with non-zero mean : -1580.727
## ARIMA(1,0,3) with non-zero mean : -1595.483
## ARIMA(2,0,3) with non-zero mean : -1592.761
## ARIMA(1,0,4) with non-zero mean : -1594.294
## ARIMA(0,0,4) with non-zero mean : -1590.563
## ARIMA(2,0,4) with non-zero mean : -1590.985
## ARIMA(1,0,3) with zero mean : -1597.513
## ARIMA(0,0,3) with zero mean : -1593.39
## ARIMA(1,0,2) with zero mean : -1592.383
## ARIMA(2,0,3) with zero mean : -1594.831
## ARIMA(1,0,4) with zero mean : -1596.333
## ARIMA(0,0,2) with zero mean : -1595.311
## ARIMA(0,0,4) with zero mean : -1592.598
## ARIMA(2,0,2) with zero mean : -1590.304
## ARIMA(2,0,4) with zero mean : -1593.038
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,3) with zero mean : -1597.997
##
## Best model: ARIMA(1,0,3) with zero mean
summary(arima_model1)
## Series: train_data1
## ARIMA(0,0,2) with non-zero mean
##
## Coefficients:
## ma1 ma2 mean
## -0.2425 0.2133 0.0031
## s.e. 0.0512 0.0518 0.0015
##
## sigma^2 = 0.0009268: log likelihood = 758.07
## AIC=-1508.15 AICc=-1508.04 BIC=-1492.55
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -9.84976e-06 0.03031723 0.02099743 -Inf Inf 0.6011294
## ACF1
## Training set -0.0008235515
summary(arima_model2)
## Series: train_data2
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## -0.1728 0.1034 0.0035
## s.e. 0.0520 0.0520 0.0017
##
## sigma^2 = 0.00127: log likelihood = 700.63
## AIC=-1393.27 AICc=-1393.15 BIC=-1377.67
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.73513e-06 0.03548796 0.02496617 -Inf Inf 0.6317874
## ACF1
## Training set -0.001178684
summary(arima_model3)
## Series: train_data3
## ARIMA(1,0,3) with zero mean
##
## Coefficients:
## ar1 ma1 ma2 ma3
## -0.8227 0.5472 -0.0078 0.2928
## s.e. 0.0758 0.0835 0.0605 0.0494
##
## sigma^2 = 0.0007219: log likelihood = 804.08
## AIC=-1598.16 AICc=-1598 BIC=-1578.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.0002327006 0.02672 0.01696816 Inf Inf 0.6269938 -0.01336257
checkresiduals(arima_model1) #diagnostic checking
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,2) with non-zero mean
## Q* = 29.266, df = 7, p-value = 0.0001294
##
## Model df: 3. Total lags used: 10
checkresiduals(arima_model2) #diagnostic checking
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 19.422, df = 7, p-value = 0.006963
##
## Model df: 3. Total lags used: 10
checkresiduals(arima_model3) #diagnostic checking
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,3) with zero mean
## Q* = 17.708, df = 6, p-value = 0.007005
##
## Model df: 4. Total lags used: 10
arima1 <- arima(train_data1, order = c(0, 0, 2))
arima2 <- arima(train_data2, order = c(2, 0, 0))
arima3 <- arima(train_data3, order = c(1, 0, 3))
summary(arima1)
##
## Call:
## arima(x = train_data1, order = c(0, 0, 2))
##
## Coefficients:
## ma1 ma2 intercept
## -0.2425 0.2133 0.0031
## s.e. 0.0512 0.0518 0.0015
##
## sigma^2 estimated as 0.0009191: log likelihood = 758.07, aic = -1508.15
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -9.84976e-06 0.03031723 0.02099743 -Inf Inf 0.6011294
## ACF1
## Training set -0.0008235515
summary(arima2)
##
## Call:
## arima(x = train_data2, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## -0.1728 0.1034 0.0035
## s.e. 0.0520 0.0520 0.0017
##
## sigma^2 estimated as 0.001259: log likelihood = 700.63, aic = -1393.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.73513e-06 0.03548796 0.02496617 -Inf Inf 0.6317874
## ACF1
## Training set -0.001178684
summary(arima3)
##
## Call:
## arima(x = train_data3, order = c(1, 0, 3))
##
## Coefficients:
## ar1 ma1 ma2 ma3 intercept
## -0.8228 0.5472 -0.0078 0.2928 -0.0002
## s.e. 0.0758 0.0835 0.0605 0.0494 0.0014
##
## sigma^2 estimated as 0.0007139: log likelihood = 804.09, aic = -1596.19
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.366658e-05 0.02671909 0.01697492 Inf Inf 0.6272434 -0.01337021
forecast1 <- forecast(arima1, h = 100)
forecast2 <- forecast(arima2, h = 100)
forecast3 <- forecast(arima3, h = 100)
plot(forecast1)
plot(forecast2)
plot(forecast3)
forecast_ori1 <- forecast(arima1, h = 200)
a <- ts(indf_log1)
forecast_ori1 %>% autoplot() + autolayer(a)
forecast_ori2 <- forecast(arima2, h = 100)
a <- ts(indf_log2)
forecast_ori2 %>% autoplot() + autolayer(a)
forecast_ori3 <- forecast(arima3, h = 100)
a <- ts(indf_log3)
forecast_ori3 %>% autoplot() + autolayer(a)
##Implementing STL Approach for Stock Price Forecasting
nvidia <- getSymbols("NVDA", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
amd <- getSymbols("AMD", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
intel <- getSymbols("INTC", src="yahoo", from="2019-03-09", to = "2021-03-09",auto.assign = FALSE)
indf_data1 <- Cl(nvidia)
indf_data2 <- Cl(amd)
indf_data3 <- Cl(intel)
freq <- 7
adj <- ts(indf_data1, frequency = freq)
whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq
desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
training.end.row <- whole.periods
training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))
fit.stl <- stl(training.ts[,1], s.window = "period")
plot(fit.stl, main="STL Decomposition")
forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")
accuracy(forecasted.adj, testing.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0002197383 2.461269 1.677249 -0.1328148 2.142144 0.4189962
## Test set -8.9809265685 9.231201 8.980927 -6.6214605 6.621461 2.2435392
## ACF1 Theil's U
## Training set -0.005841008 NA
## Test set -0.111703355 2.454408
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))
freq <- 7
adj <- ts(indf_data2, frequency = freq)
whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq
desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
training.end.row <- whole.periods
training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))
fit.stl <- stl(training.ts[,1], s.window = "period")
plot(fit.stl, main="STL Decomposition")
forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")
accuracy(forecasted.adj, testing.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.000157209 1.821370 1.235350 -0.08391814 2.360181 0.3817300
## Test set -2.664573613 3.055589 2.664574 -3.19070193 3.190702 0.8233679
## ACF1 Theil's U
## Training set -0.006049113 NA
## Test set -0.010945417 0.9274687
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))
freq <- 7
adj <- ts(indf_data3, frequency = freq)
whole.periods <- floor(nrow(adj) / freq)
partial.periods <- nrow(adj) %% freq
desired.test <- 3
training.end.row <- whole.periods + 1
training.end.col <- ifelse(partial.periods == 0, freq - desired.test, freq - partial.periods - desired.test)
if(partial.periods < desired.test){
training.end.row <- whole.periods
training.end.col <- freq - (desired.test - partial.periods)
}
training.ts <- window(adj, c(1,1), c(training.end.row,training.end.col))
testing.ts <- window(adj, c(training.end.row, training.end.col + 1))
fit.stl <- stl(training.ts[,1], s.window = "period")
plot(fit.stl, main="STL Decomposition")
forecasted.adj <- stlf(training.ts[,1], s.window = "period", method="arima", h=desired.test, lambda = 4)
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o weekly data)")
accuracy(forecasted.adj, testing.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01763849 1.408657 0.8949262 -0.006897264 1.672304 0.3597486
## Test set -1.58380541 1.887049 1.5838054 -2.611414023 2.611414 0.6366690
## ACF1 Theil's U
## Training set -0.01084741 NA
## Test set -0.15526621 1.078559
plot(forecasted.adj, main="Forecasts of NWN from STL and ARIMA (w/o month data)", xlim = c(60, 75))